/**
 * Copyright (c) 2024 Huawei Technologies Co., Ltd.
 * This file is a part of the CANN Open Software.
 * Licensed under CANN Open Software License Agreement Version 1.0 (the "License").
 * Please refer to the License for details. You may not use this file except in compliance with the License.
 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
 * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
 * See LICENSE in the root of the software repository for the full text of the License.
 */

#include "kernel_operator.h"
#include "lib/matmul_intf.h"
#include "../../../../../../kernel_impl/batch_matmul_custom_impl.h"

constexpr int32_t FULL_L1_SIZE = 512 * 1024;
constexpr int32_t FULL_L0C_SIZE = 128 * 1024;

extern "C" __global__ __aicore__ void batch_matmul_custom(GM_ADDR a, GM_ADDR b, GM_ADDR bias, GM_ADDR c, GM_ADDR workspace, GM_ADDR tiling) {
    // prepare tiling
    GET_TILING_DATA(tilingData, tiling);
    // define matmul kernel
    typedef AscendC::MatmulType<AscendC::TPosition::GM, CubeFormat::ND, half, false, LayoutMode::BSNGD> A_TYPE;
    typedef AscendC::MatmulType<AscendC::TPosition::GM, CubeFormat::ND, half, true, LayoutMode::BSNGD> B_TYPE;
    typedef AscendC::MatmulType<AscendC::TPosition::GM, CubeFormat::ND, float, false, LayoutMode::BSNGD> C_TYPE;
    typedef AscendC::MatmulType<AscendC::TPosition::GM, CubeFormat::ND, float> BIAS_TYPE;
    BatchMatmulKernel<A_TYPE, B_TYPE, C_TYPE, BIAS_TYPE> batchMatmulKernel;
    AscendC::TPipe pipe;
    tilingData.cubeTilingData.shareMode = 0;             // 0, share mode
    tilingData.cubeTilingData.shareL1Size = FULL_L1_SIZE;  // full L1
    tilingData.cubeTilingData.shareL0CSize = FULL_L0C_SIZE; // full L0C
    tilingData.cubeTilingData.shareUbSize = 0;           // no UB
    REGIST_MATMUL_OBJ(&pipe, GetSysWorkSpacePtr(), batchMatmulKernel.matmulObj, &tilingData.cubeTilingData);
    int batchNum = tilingData.cubeTilingData.BatchNum;
    // init matmul kernel
    batchMatmulKernel.Init(a, b, bias, c, workspace, tilingData.cubeTilingData);
    // matmul kernel process
    batchMatmulKernel.Process<true>(&pipe, batchNum, batchNum);
}